#api服务的脚本
from fastapi import FastAPI
from unsloth import FastLanguageModel
app = FastAPI()
model, tokenizer = FastLanguageModel.from_pretrained("ckpt/r1-final", load_in_4bit=True)

@app.post("/chat")
def chat(req: dict):
    inputs = tokenizer(req["prompt"], return_tensors="pt").to("cuda")
    out = model.generate(**inputs, max_new_tokens=1024)
    return {"response": tokenizer.decode(out[0])}
uvicorn main:app --host 0.0.0.0 --port 8000
